library(data.table)
library(dplyr)
library(fastDummies)
library(here)

# Qualtrics data --------------------------------------------------------------------

data <- fread(here("data", "CB Int'l - Round 2_July 19, 2023_09.06.csv"))
data <- data[3:nrow(data)]

data <- data[ , -c('StartDate',
                   'EndDate',
                   'Status',
                   'IPAddress',
                   'Progress',
                   'Duration (in seconds)',
                   'Finished',
                   'RecordedDate',
                   'ResponseId',
                   'RecipientLastName',
                   'RecipientFirstName',
                   'RecipientEmail',
                   'ExternalReference',
                   'LocationLatitude',
                   'LocationLongitude',
                   'DistributionChannel',
                   'UserLanguage')]


# Prolific data ---------------------------------------------------------------------

prolific <- fread(here('data', 'prolific_export_6499b35527b63559c60d20f4.csv'))

prolific <- prolific[ , .(PROLIFIC_PID = `Participant id`, 
                          age = Age,
                          sex = Sex,
                          income = `Household income (usd) [us participants only]`)]

fullData <- merge(data, prolific, by = 'PROLIFIC_PID')

rm(data, prolific)

# Clean data ------------------------------------------------------------------------

## Drop people who fail attention checks --------------------------------------------

fullData <- fullData[`consent form` == "I consent, begin the study."]

fullData$attentive <- ifelse(fullData$atten_2 == 'I understand.' & 
                               fullData$atten_1 == "Extremely interested.,Very interested.",
                             1, 0)

# Remove duplicate respondents
fullData <- fullData[!duplicated(fullData$PROLIFIC_PID), ]

# Exclude respondents that dropped mid-survey
fullData <- fullData[!(internationalism_3 == '')]

fullData[(internationalism_3 == '')]

## Treatment ------------------------------------------------------------------------

#fullData <- fullData[!(cntrl == '' & treat_china == '')]
fullData$treat <- ifelse(nchar(fullData$cntrl) > 0, 0L, 1L)
fullData$cntrl

## Outcome: Policy Support -------------------------------------------------------------------

fullData$policy_support <- paste0(fullData$cntrl, fullData$treat_china)
fullData$policy_support_bin <- grepl('support', fullData$policy_support,
                                     ignore.case = TRUE)

# Integer
fullData$policy_support_int <- recode(fullData$policy_support, 
                                      "Strongly oppose"  = 1L,
                                      "Oppose"           = 2L,
                                      "Slightly oppose"  = 3L,
                                      "Slightly support" = 4L,
                                      "Support"          = 5L,
                                      "Strongly support" = 6L,
                                      .default = NA_integer_)

## Outcome: Fed Trust ------------------------------------------------------------------------

fullData$trust <- as.integer(fullData$fed_trust_1)
#table(fullData$trust, fullData$fed_trust_1)

## Stock ownership ------------------------------------------------------------------

table(fullData$stock)
fullData$stock <- fullData$stock == 'Yes'


## College --------------------------------------------------------------------------

#table(fullData$college)

fullData$college_bin <- grepl('professional|master|bachelor|doctorate', 
                              fullData$college, 
                              ignore.case = TRUE)

#table(fullData[ , .(college, college_bin)])

## Gender ---------------------------------------------------------------------------

fullData$female <- fullData$sex == 'Female'
#table(fullData$sex, fullData$female)


## Income ---------------------------------------------------------------------------

table(fullData$income)
fullData$income_int <- recode(fullData$income,
                              "Less than $10000" = '1',
                              '$10000–$15999' = '2' ,  
                              "$16000–$19999" = '3',
                              "$20000–$29999" = '4',
                              "$30000–$39999" = '5',
                              "$40000–$49999" = '6',
                              "$50000–$59999" = '7',
                              "$60000–$69999" = '8',
                              "$70000–$79999" = '9',
                              "$80000–$89999" = '10',
                              "$90000–$99999" = '11',
                              "$100000–$149999" = '12',       
                              "More than $150000" = '13')

fullData$income_int <- as.integer(fullData$income_int)

table(fullData$income_int, fullData$income)

fullData <- mutate(fullData, hhiBin = case_when(income_int >= 1 & income_int <= 6  ~ "49", 
                                                income_int >= 7 & income_int <= 11 ~ "99",
                                                income_int == 12            ~ "149",
                                                income_int == 13            ~ "150"))

## Age ------------------------------------------------------------------------------

fullData[ , age := as.integer(age)]

fullData <- mutate(fullData, ageBin = case_when(age >= 18 & age <= 24 ~ "18", 
                                                age >= 25 & age <= 39 ~ "25",
                                                age >= 40 & age <= 59 ~ "40",
                                                age >= 60 ~"60"))

fullData$ageBin <- factor(fullData$ageBin, 
                          levels = c("18", "25", "40", "60"))

fullData <- fastDummies::dummy_cols(fullData, c('ageBin','hhiBin'),
                                    ignore_na = TRUE)

## Partisanship ---------------------------------------------------------------------

table(fullData$party_id_1)
fullData[ , party := factor(party_id_1, levels = c('Independent',
                                                   'Democrat',
                                                   'Republican'))]

fullData <- cbind(fullData,
                  dummy_cols(fullData$party)[2:4])

colnames(fullData)[which(grepl('.data_', colnames(fullData), fixed = TRUE))] <- c('Independent', 'Democrat', 'Republican')


# Create nationalism index ------------------------------------------------

fullData[ , .(nat_superiority1, nat_superiority2, nat_superiority3)]

# Nat1
# How many things about America make you ashamed?
# Very many, many, not many, none

fullData$nat1_int[fullData$nat_superiority1 == 'None'] <- 3
fullData$nat1_int[fullData$nat_superiority1 == 'Not many'] <- 2
fullData$nat1_int[fullData$nat_superiority1 == 'Many'] <- 1
fullData$nat1_int[fullData$nat_superiority1 == 'Very many'] <- 0

# Nat2 
# How superior is the United States compared to other nations?
# Vastly superior, very superior, not so superior, not at all superior

fullData$nat2_int[fullData$nat_superiority2 == 'Vastly superior'] <- 3
fullData$nat2_int[fullData$nat_superiority2 == 'Very superior'] <- 2
fullData$nat2_int[fullData$nat_superiority2 == 'Not so superior'] <- 1
fullData$nat2_int[fullData$nat_superiority2 == 'Not at all superior'] <- 0

#table(fullData[, c('nat_superiority2', 'nat2_int')])

# Nat 3
# I would rather be a citizen of America than of any other country in the world.
# Strongly agree, somewhat agree, somewhat disagree, strongly disagree

fullData$nat3_int[fullData$nat_superiority3 == 'Strongly agree'] <- 3
fullData$nat3_int[fullData$nat_superiority3 == 'Somewhat agree'] <- 2
fullData$nat3_int[fullData$nat_superiority3 == 'Somewhat disagree'] <- 1
fullData$nat3_int[fullData$nat_superiority3 == 'Strongly disagree'] <- 0

#table(fullData[, c('nat_superiority3', 'nat3_int')])

fullData$nat_total_int <- (fullData$nat1_int + fullData$nat2_int + fullData$nat3_int) / 9

#hist(fullData$nat_total_int,
#     xlab = 'Nationalism Index',
#     main = 'Histogram of Nationalism Index')


# Zero sum attitude -----------------------------------------------------------------

#table(fullData$zero_sum1)
#table(fullData$zero_sum2)

fullData$zero_sum <- ifelse(fullData$zero_sum1 == 'Decrease jobs in the United States' &
                              fullData$zero_sum2 == 'Increased jobs in other countries',
                            1, 0)

#table(fullData$zero_sum)


# Internationalist attitudes --------------------------------------------------------

#table(fullData$internationalism_1)
#table(fullData$internationalism_2)
#table(fullData$internationalism_3)
#table(fullData$internationalism_4)
#table(fullData$internationlaism_5)

# correct misnamed variable
temp <- which(colnames(fullData) == 'internationlaism_5')
colnames(fullData)[temp] <- 'internationalism_5'
rm(temp)

# Qs 1-3, Agree is more internationalist
fullData[ , paste0('internationalism_', 1:3, 'int') := 
            lapply(.SD, function (x){ recode(x,
                                             "Strongly disagree"= 0L,
                                             'Somewhat disagree' = 1L,  
                                             "Neither agree nor disagree" = 2L,
                                             "Somewhat agree" = 3L,
                                             "Strongly agree" = 4L)}
            ),# end lapply
          .SDcols = paste0('internationalism_', 1:3)
] # end data.table call

# Qs 4-5, disagree is more internationalist
fullData[ , paste0('internationalism_', 4:5, 'int') := 
            lapply(.SD, function (x){ recode(x,
                                             "Strongly disagree"= 4L,
                                             'Somewhat disagree' = 3L,  
                                             "Neither agree nor disagree" = 2L,
                                             "Somewhat agree" = 1L,
                                             "Strongly agree" = 0L)}
            ),# end lapply
          .SDcols = paste0('internationalism_', 4:5)
] # end data.table call

fullData$internationalism_total_int <- (fullData$internationalism_1int
                                        + fullData$internationalism_2int
                                        + fullData$internationalism_3int
                                        + fullData$internationalism_4int
                                        + fullData$internationalism_5int) / 20

#hist(fullData$internationalism_total_int,
#     xlab = 'Internationalism Index',
#     main = 'Histogram of Internationalism Index')

survey2 <- fullData
rm(fullData)